/* `uniq' remove duplicate lines from a sorted file
   Copyright (C) 1986 Martin Minow, Richard M. Stallman

		       NO WARRANTY

  BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW.  EXCEPT
WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY
AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE PROGRAM PROVE
DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
CORRECTION.

 IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.

		GENERAL PUBLIC LICENSE TO COPY

  1. You may copy and distribute verbatim copies of this source file
as you receive it, in any medium, provided that you conspicuously
and appropriately publish on each copy a valid copyright notice
"Copyright (C) 1986 Martin Minow, Richard M. Stallman"; and include
following the copyright notice a verbatim copy of the above disclaimer
of warranty and of this License.

  2. You may modify your copy or copies of this source file or
any portion of it, and copy and distribute such modifications under
the terms of Paragraph 1 above, provided that you also do the following:

    a) cause the modified files to carry prominent notices stating
    that you changed the files and the date of any change; and

    b) cause the whole of any work that you distribute or publish,
    that in whole or in part contains or is a derivative of this
    program or any part thereof, to be freely distributed
    and licensed to all third parties on terms identical to those
    contained in this License Agreement (except that you may choose
    to grant more extensive warranty protection to third parties,
    at your option).

  3. You may copy and distribute this program or any portion of it in
compiled, executable or object code form under the terms of Paragraphs
1 and 2 above provided that you do the following:

    a) cause each such copy to be accompanied by the
    corresponding machine-readable source code, which must
    be distributed under the terms of Paragraphs 1 and 2 above; or,

    b) cause each such copy to be accompanied by a
    written offer, with no time limit, to give any third party
    free (except for a nominal shipping charge) a machine readable
    copy of the corresponding source code, to be distributed
    under the terms of Paragraphs 1 and 2 above; or,

    c) in the case of a recipient of this program in compiled, executable
    or object code form (without the corresponding source code) you
    shall cause copies you distribute to be accompanied by a copy
    of the written offer of source code which you received along
    with the copy you received.

  4. You may not copy, sublicense, distribute or transfer this program
except as expressly provided under this License Agreement.  Any attempt
otherwise to copy, sublicense, distribute or transfer this program is void and
your rights to use the program under this License agreement shall be
automatically terminated.  However, parties who have received computer
software programs from you with this License Agreement will not have
their licenses terminated so long as such parties remain in full compliance.

 In other words, you are welcome to use, share and improve this program.
 You are forbidden to forbid anyone else to use, share and improve
 what you give them.   Help stamp out software-hoarding!  */

#include <stdio.h>
#include <ctype.h>

/* A `struct linebuffer' is a structure which holds a line of text.
 `readline' reads a line from a stream into a linebuffer
 and works regardless of the length of the line.  */

struct linebuffer
  {
    long size;
    char *buffer;
  };

struct linebuffer lb1, lb2;

int skip_fields;		/* Number of fields to skip	*/
int skip_letters;		/* Number of letters to skip	*/
int check_letters;		/* Number of letters to test	*/

/* 'c' to print line count before each line;
   'z' to do so with leading zeros,
   or 0 not to print a line count */

int countmode;

/* 'u' to print only lines appearing just once,
   'd' to print (one copy each of) lines appearing more than once,
   or 0 to do both */

int mode;

void initbuffer ();
long readline ();
void writeline ();
char *find_field ();
char *concat ();
long integer_arg ();

main(argc, argv)
     int argc;
     char *argv[];
{
  int i;
  char *infile, *outfile;

  skip_letters = 0;
  skip_fields = 0;
  check_letters = 0;
  mode = 0;
  countmode = 0;

  for (i = 1; i < argc; i++)
    {
      if (!strcmp (argv[i], "-w"))
	{
	  if (++i == argc)
	    fatal ("-w specified with no argument");
	  check_letters = integer_arg (argv[i]);
	}
      else if (argv[i][0] == '-' && isdigit (argv[i][1]))
	skip_fields = integer_arg (argv[i] + 1);
      else if (argv[i][0] == '+')
	skip_letters = integer_arg (argv[i] + 1);
      else if (argv[i][0] == '-')
	{
	  char *p = argv[i] + 1;
          char c;
	  while (c = *p++)
	    switch (c)
	      {
	      case 'c':
	      case 'z':
		countmode = c;
		break;

	      case 'd':
	      case 'u':
		mode = c;
		break;

	      default:
		error ("unrecognized switch %s", argv[i]);
		goto argdone;
	      }
	argdone: ;
	}
      else if (!infile)
	infile = argv[i];
      else if (!outfile)
	outfile = argv[i];
      else
	{
	  error ("too many arguments", 0);
	  break;
	}
    }

  check_file (infile, outfile);
}

/* Initialize a linebuffer for use */

void
initbuffer (linebuffer)
     struct linebuffer *linebuffer;
{
  linebuffer->size = 200;
  linebuffer->buffer = (char *) xmalloc (200);
}

/* Read a line of text from `stream' into `linebuffer'.
 Return the length of the line.  */

long
readline (linebuffer, stream)
     struct linebuffer *linebuffer;
     FILE *stream;
{
  char *buffer = linebuffer->buffer;
  char *p = linebuffer->buffer;
  char *end = p + linebuffer->size;

  while (1)
    {
      int c = getc (stream);
      if (p == end)
	{
	  buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
	  p += buffer - linebuffer->buffer;
	  end += buffer - linebuffer->buffer;
	  linebuffer->buffer = buffer;
	}
      if (c < 0 || c == '\n')
	{
	  *p = 0;
	  break;
	}
      *p++ = c;
    }

  return p - buffer;
}

/* Output the line in the specified linebuffer
 provided the switches say it should be output. */

void
writeline (line, stream, linecount)
     struct linebuffer *line;
     FILE *stream;
     int linecount;
{
  if ((mode == 'u' && linecount) ||
      (mode == 'd' && !linecount))
    return;

  if (countmode == 'c')
    fprintf(stream, "%7d\t", linecount + 1);
  else if (countmode == 'z')
    fprintf(stream, "%07d\t", linecount + 1);

  fputs (line->buffer, stream);
  putc ('\n', stream);
}

/* Process one input file as specified */

int
check_file (infile, outfile)
     char *infile, *outfile;
{
  FILE *istream = infile ? fopen (infile, "r") : stdin;
  FILE *ostream = outfile ? fopen (outfile, "w") : stdout;
  struct linebuffer lb1, lb2;
  struct linebuffer *thisline, *prevline, *exch;
  char *prevfield, *thisfield;
  int failure = 0;
  int match_count = 0;

  if (!istream)
    pfatal_with_name (infile);

  if (!ostream)
    pfatal_with_name (outfile);

  thisline = &lb1;
  prevline = &lb2;

  initbuffer (thisline);
  initbuffer (prevline);

  readline (prevline, istream);
  prevfield = find_field (prevline);

  while (!feof (istream))
    {
      readline (thisline, istream);
      thisfield = find_field (thisline);
      if (!thisline->buffer[0] && feof (istream)) break;
      if (!compare (thisfield, prevfield))
	{
	  match_count++;
	}
      else
	{
	  writeline (prevline, ostream, match_count);
	  match_count = 0;
	  exch = prevline;  prevline = thisline; thisline = exch;
	  prevfield = thisfield;
	}
    }

  writeline (prevline, ostream, match_count);

  if (infile)
    fclose (istream);
  if (outfile)
    fclose (ostream);

  free (lb1.buffer);
  free (lb2.buffer);
}

/* Given a linebuffer,
 return a pointer to the beginning of the line's field to be compared. */

char *
find_field (line)
     struct linebuffer *line;
{
  register int count;
  register char c;
  register char *lp = line->buffer;

  for (count = 0; count++ < skip_fields;)
    {
      while ((c = *lp) == ' ' || c == '\t') lp++;
      while ((c = *lp) != ' ' && c != '\t')
	{
	  if (c == 0)
	    return lp;
	  else lp++;
	}
    }

  for (count = 0; count++ < skip_letters; lp++)
    {
      if (*lp == 0) break;
    }

  return lp;
}

/* Return zero if two lines match.
 The pointers passed point not to the beginnings of the lines
 but rather to the beginnings of the fields to be compared.  */

int
compare (old, new)
     char *old;		/* Compare this field */
     char *new;		/* Against this field */
{
  if (check_letters)
    return strncmp(old, new, check_letters);
  else
    return strcmp(old, new);
}

/* Print error message and exit.  */

fatal (s1, s2)
     char *s1, *s2;
{
  error (s1, s2);
  exit (1);
}

/* Print error message.  `s1' is printf control string, `s2' is arg for it. */

error (s1, s2)
     char *s1, *s2;
{
  printf ("uniq: ");
  printf (s1, s2);
  printf ("\n");
}

pfatal_with_name (name)
     char *name;
{
  extern int errno, sys_nerr;
  extern char *sys_errlist[];
  char *s;

  if (errno < sys_nerr)
    s = concat ("", sys_errlist[errno], " for %s");
  else
    s = "cannot open %s";
  fatal (s, name);
}

/* Return a newly-allocated string whose contents concatenate those of s1, s2, s3.  */

char *
concat (s1, s2, s3)
     char *s1, *s2, *s3;
{
  int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
  char *result = (char *) xmalloc (len1 + len2 + len3 + 1);

  strcpy (result, s1);
  strcpy (result + len1, s2);
  strcpy (result + len1 + len2, s3);
  *(result + len1 + len2 + len3) = 0;

  return result;
}

/* Like malloc but get fatal error if memory is exhausted.  */

int
xmalloc (size)
     int size;
{
  int result = malloc (size);
  if (!result)
    fatal ("virtual memory exhausted", 0);
  return result;
}


int
xrealloc (ptr, size)
     char *ptr;
     int size;
{
  int result = realloc (ptr, size);
  if (!result)
    fatal ("virtual memory exhausted");
  return result;
}

/* Parse string `s' as an integer, using decimal radix by default,
 but allowing octal and hex numbers as in C.  */

long
integer_arg (s)
     char *s;
{
  long value;
  int radix = 10;
  char *p = s;
  int c;

  if (*p != '0')
    radix = 10;
  else if (*++p == 'x')
    {
      radix = 16;
      p++;
    }
  else
    radix = 8;

  value = 0;
  while (((c = *p++) >= '0' && c <= '9')
	 || (radix == 16 && (c & ~40) >= 'A' && (c & ~40) <= 'Z'))
    {
      value *= radix;
      if (c >= '0' && c <= '9')
	value += c - '0';
      else
	value += (c & ~40) - 'A';
    }

  if (c)
    fatal ("invalid integer argument %s", s);
  return value;
}
